In [1]:
# ERP Classification Example
# roughly based on /svn/bbci/toolbox/demos/stdERPclassification.m

In [2]:
from __future__ import division

import numpy as np
from matplotlib import pyplot as plt
from scipy.signal import butter
from sklearn.lda import LDA
from sklearn.neighbors import KNeighborsClassifier
from sklearn import cross_validation

from wyrm import processing as proc
from wyrm import plot
from wyrm import io

In [3]:
# load bv data into cnt
cnt = io.load_brain_vision_data('data/OnlineTrainFileVPfaz.vhdr')
# remove unneeded channels
cnt = proc.remove_channels(cnt, ['EOG.*', 'Mas.*'])
# bandpass filter the data
fn = cnt.fs / 2
b, a = butter(4, [2 / fn, 40 / fn], btype='band')
cnt = proc.lfilter(cnt, b, a)
# subsampling to 100hz
cnt = proc.subsample(cnt, 100)
# epoch the data
mrk_def = {'std': ['S %2i' % i for i in range(2, 7)],
           'dev': ['S %2i' % i for i in range(12, 17)]
           }
epo = proc.segment_dat(cnt, mrk_def, [0, 800])


/home/venthur/python-bci-env/wyrm/wyrm/io.py:104: DeprecationWarning: Implicitly casting between incompatible kinds. In a future numpy release, this will raise an error. Use casting="unsafe" if this is intentional.
  data *= resolutions[0]

In [4]:
# create the feature vector
# make sure you don't apply information from the test set to the training set

In [5]:
# TODO: proc_jumping means
reload(proc)
epo = proc.jumping_means(epo, [[80, 350], [360, 800]])
fv = proc.create_feature_vectors(epo)

Simple classification with 50/50 split

To create the feature vector we just calculate the jumping means over the time-axis for each channel and concatenate them.


In [6]:
std = proc.select_classes(fv, [1]).data
dev = proc.select_classes(fv, [0]).data
std_split = std.shape[0] // 2
dev_split = dev.shape[0] // 2
std1, std2 = std[:std_split], std[std_split:]
dev1, dev2 = dev[:dev_split], dev[dev_split:]
train = np.append(std1, dev1, axis=0)
train_labels = np.append(np.zeros(len(std1)), np.ones(len(dev1)))
test = np.append(std2, dev2, axis=0)
test_labels = np.append(np.zeros(len(std2)), np.ones(len(dev2)))

In [7]:
# howto use the LDA
clf = LDA()
# split your data into training and test set
# See also: cross validation
# train your classifier
clf.fit(train, train_labels)
# predict your data
# clf.predict(data)
# get the accuracy for known data
print 'LDA Classification Accuracy: %.2f' % clf.score(test, test_labels)


LDA Classification Accuracy: 0.76

In [8]:
# Just for the Lulz: QDA
clf = KNeighborsClassifier()
clf.fit(train, train_labels)
print 'KNN Classification Accuracy: %.2f' % clf.score(test, test_labels)


KNN Classification Accuracy: 0.81

Cross Validation

Here we test the 10-fold cross validation for the jumping means-based classification. The stratified means that we make sure the percentage of classes is preserved in the test and training sets.


In [9]:
skf = cross_validation.StratifiedKFold(fv.axes[0], 10)
for train_i, test_i in skf:
    train = fv.data[train_i]
    train_labels = fv.axes[0][train_i]
    test = fv.data[test_i]
    test_labels = fv.axes[0][test_i]
    clf_lda, clf_knn = LDA(), KNeighborsClassifier()
    clf_lda.fit(train, train_labels)
    clf_knn.fit(train, train_labels)
    print 'LDA/KNN Classification Accuracy: %.2f/%.2f' % (clf_lda.score(test, test_labels), clf_knn.score(test, test_labels))


LDA/KNN Classification Accuracy: 0.77/0.79
LDA/KNN Classification Accuracy: 0.88/0.80
LDA/KNN Classification Accuracy: 0.79/0.79
LDA/KNN Classification Accuracy: 0.78/0.82
LDA/KNN Classification Accuracy: 0.80/0.82
LDA/KNN Classification Accuracy: 0.83/0.82
LDA/KNN Classification Accuracy: 0.73/0.77
LDA/KNN Classification Accuracy: 0.80/0.80
LDA/KNN Classification Accuracy: 0.79/0.83
LDA/KNN Classification Accuracy: 0.82/0.78

In [ ]: